Model Diagnostics and Posterior Predictive Check: Subgroups Model

pacman::p_load(MetaStan, cmdstanr, dplyr, tidyr, rio, here, data.table,
               posterior, ggdist, ggplot2, bayesplot)

# Set Stan options
options(mc.cores = parallel::detectCores())
color_scheme_set("red")

Data

dat = import(here("data/raw_data.xlsx"))

setnames(dat, old=c("Int_Events","Control_Events"), new=c("r2", "r1"))
setnames(dat, old=c("Int_Total","Control_Total"), new=c("n2", "n1"))

J <- nrow(dat)  # Number of studies (84)
y <- as.matrix(dat[, c("r1", "r2")])  # Events: control (r1), treatment (r2)
sample <- as.matrix(dat[, c("n1", "n2")])  # Sample sizes: control (n1), treatment (n2)
zero <- c(0, 0)  # Zero vector for random effects mean
subgroup <- dat[, "Low_Dose"]

# Create the Stan data list
stan_data <- list(
  J = J,
  zero = zero,
  y = y,
  sample = sample,
  subgroup = subgroup
)

Model

# Compile the Stan model
zibglmm1 <- cmdstan_model(here("models/stan/zibglmm_subgroups_model1.stan"))
csv_files <- list.files(here("models/storage"),
                        pattern = "^zibglmm_subgroups_model1.*\\.csv$",
                        full.names = TRUE)


if (length(csv_files) > 0 && all(file.exists(csv_files))) {
  # Load the fit1 from CSV files
  fit1 <- cmdstanr::as_cmdstan_fit(csv_files)
  
} else {
  # fit1 the model if no CSV files are found
  fit1 <- zibglmm1$sample(
    data = stan_data,
    chains = 4,
    parallel_chains = 4,
    iter_warmup = 2000,
    iter_sampling = 2000,
    adapt_delta = 0.99,  
    max_treedepth = 15,  
    seed = 123,
    refresh = 0
)
  
  # Save CSV files to persistent directory
  fit1$save_output_files(dir = "models/storage")
}

Diagnostics

draws_diagnostics <- fit1$draws(variables = c("mu", "sigma_nu", "pi", "beta"))

summary_df <- fit1$summary(variables = c("mu", "sigma_nu", "pi", "beta"))

print(summary_df[, c("variable", "ess_bulk", "ess_tail", "rhat")])
# A tibble: 7 × 4
  variable    ess_bulk ess_tail  rhat
  <chr>          <dbl>    <dbl> <dbl>
1 mu[1]          3811.    4995.  1.00
2 mu[2]          3372.    4280.  1.00
3 sigma_nu[1]    6367.    5862.  1.00
4 sigma_nu[2]    4268.    4895.  1.00
5 pi             6017.    4859.  1.00
6 beta[1]        7461.    6591.  1.00
7 beta[2]        5817.    5792.  1.00
fit1$diagnostic_summary()
$num_divergent
[1] 0 0 0 0

$num_max_treedepth
[1] 0 0 0 0

$ebfmi
[1] 0.9502290 0.9472499 0.9448076 0.9150427

Check for convergence and mixing of chains

mcmc_trace(draws_diagnostics, pars = c("mu[1]", "mu[2]", "sigma_nu[1]",
                                       "sigma_nu[2]", "pi", "beta[1]", "beta[2]")) +
  ggtitle("Trace Plots for Key Parameters")

Check autocorrelation for key parameters

mcmc_acf(draws_diagnostics, pars = c("mu[1]", "mu[2]", "sigma_nu[1]",
                                     "sigma_nu[2]", "pi",  "beta[1]", "beta[2]"),
         lags = 10) +
  ggtitle("Autocorrelation")

Posterior Predictive Check

A posterior predictive check (PPC) is a Bayesian method used to assess how well a statistical model fit1s the observed data by comparing it to data simulated from the model. It involves generating replicated datasets (\(y_{\text{rep}}\)) from the posterior predictive distribution, which combines the fit1ted model parameters with new random noise. Test statistics (e.g., means, variances, or proportions) are computed for both the observed data (\(y_{\text{obs}}\)) and the replicated data. By comparing these statistics—through visualizations (e.g., histograms) or numerical summaries (e.g., posterior p-values)—PPCs evaluate whether the model can reproduce key features of the data. If the observed statistics align with the replicated ones, the model is considered a good fit1; significant discrepancies suggest model misspecification.

Posterior predictive p-values (also called Bayesian p-values) are a useful way to quantify the fit1 of your model in a posterior predictive check (PPC). They measure the probability that a test statistic computed from the replicated data (y_rep) is more extreme than the same test statistic computed from the observed data (y_obs). A Bayesian p-value close to 0 or 1 indicates a poor fit1, while a value around 0.5 suggests the model captures the observed data well for that statistic.

For a test statistic \(T(y)\), the posterior predictive p-value is defined as:

\(p = P(T(y_{\text{rep}}) \geq T(y_{\text{obs}}) \mid y_{\text{obs}})\)

where \(y_{\text{rep}}\) is the replicated data from the posterior predictive distribution, and \(y_{\text{obs}}\) is the observed data.

# Extract observed and replicated data
y_obs <- as.matrix(dat[, c("r1", "r2")])  # Observed data (e.g., r1 for treatment, r2 for control)
y_rep <- fit1$draws("y_rep")  # Posterior predictive samples
y_rep <- as_draws_matrix(y_rep)  # Convert to matrix
J <- nrow(dat)  # Number of studies
n_samples <- nrow(y_rep)  # Number of posterior draws

# Extract subgroup covariate
subgroup <- dat$Low_Dose  # Binary vector (0 or 1) for each study

# Extract model parameters for probability computation
draws1 <- fit1$draws(format = "df")
mu_control <- draws1$`mu[1]`
mu_treatment <- draws1$`mu[2]`
beta_control <- draws1$`beta[1]`
beta_treatment <- draws1$`beta[2]`

# Compute empirical probabilities
empirical_probabilities <- dat |> 
  reframe(Study = factor(1:nrow(dat)), 
          p1 = r1/n1, 
          p2 = r2/n2, 
          Subgroup = factor(subgroup, levels = c(0, 1), labels = c("Subgroup 0", "Subgroup 1")))

1. Proportion of double zeros

# 1. PPC: Proportion of double zeros
double_zeros_obs_sub0 <- sum(y_obs[subgroup == 0, 1] == 0 & y_obs[subgroup == 0, 2] == 0) / sum(subgroup == 0)
double_zeros_obs_sub1 <- sum(y_obs[subgroup == 1, 1] == 0 & y_obs[subgroup == 1, 2] == 0) / sum(subgroup == 1)

double_zeros_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  sum(y_mat[subgroup == 0, 1] == 0 & y_mat[subgroup == 0, 2] == 0) / sum(subgroup == 0)
})

double_zeros_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  sum(y_mat[subgroup == 1, 1] == 0 & y_mat[subgroup == 1, 2] == 0) / sum(subgroup == 1)
})

double_zeros_rep_sub0 <- as.matrix(double_zeros_rep_sub0)
double_zeros_rep_sub1 <- as.matrix(double_zeros_rep_sub1)

p_value_double_zeros_sub0 <- mean(double_zeros_rep_sub0 >= double_zeros_obs_sub0)
p_value_double_zeros_sub1 <- mean(double_zeros_rep_sub1 >= double_zeros_obs_sub1)
ppc_stat(double_zeros_obs_sub0, double_zeros_rep_sub0, stat = "identity",
         binwidth = 0.03) +
  ggtitle("PPC: Proportion of Double-Zero Studies (Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_double_zeros_sub0, 3))) +
  xlab("Proportion of Double-Zero Studies")

ppc_stat(double_zeros_obs_sub1, double_zeros_rep_sub1, stat = "identity") +
  ggtitle("PPC: Proportion of Double-Zero Studies (Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_double_zeros_sub1, 3))) +
  xlab("Proportion of Double-Zero Studies")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2. Mean event rates

mean_rate_control_obs_sub0 <- mean(y_obs[subgroup == 0, 1] / dat$n1[subgroup == 0])
mean_rate_control_obs_sub1 <- mean(y_obs[subgroup == 1, 1] / dat$n1[subgroup == 1])
mean_rate_treatment_obs_sub0 <- mean(y_obs[subgroup == 0, 2] / dat$n2[subgroup == 0])
mean_rate_treatment_obs_sub1 <- mean(y_obs[subgroup == 1, 2] / dat$n2[subgroup == 1])

mean_rate_control_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  mean(y_mat[subgroup == 0, 1] / dat$n1[subgroup == 0])
})
mean_rate_control_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  mean(y_mat[subgroup == 1, 1] / dat$n1[subgroup == 1])
})
mean_rate_treatment_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  mean(y_mat[subgroup == 0, 2] / dat$n2[subgroup == 0])
})
mean_rate_treatment_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  mean(y_mat[subgroup == 1, 2] / dat$n2[subgroup == 1])
})

mean_rate_control_rep_sub0 <- as.matrix(mean_rate_control_rep_sub0)
mean_rate_control_rep_sub1 <- as.matrix(mean_rate_control_rep_sub1)
mean_rate_treatment_rep_sub0 <- as.matrix(mean_rate_treatment_rep_sub0)
mean_rate_treatment_rep_sub1 <- as.matrix(mean_rate_treatment_rep_sub1)

p_value_mean_rate_control_sub0 <- mean(mean_rate_control_rep_sub0 >= mean_rate_control_obs_sub0)
p_value_mean_rate_control_sub1 <- mean(mean_rate_control_rep_sub1 >= mean_rate_control_obs_sub1)
p_value_mean_rate_treatment_sub0 <- mean(mean_rate_treatment_rep_sub0 >= mean_rate_treatment_obs_sub0)
p_value_mean_rate_treatment_sub1 <- mean(mean_rate_treatment_rep_sub1 >= mean_rate_treatment_obs_sub1)
ppc_stat(mean_rate_control_obs_sub0, mean_rate_control_rep_sub0, stat = "identity") +
  ggtitle("PPC: Mean Event Rate (Control, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_mean_rate_control_sub0, 3))) +
  xlab("Mean Event Rate (Control)")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(mean_rate_control_obs_sub1, mean_rate_control_rep_sub1, stat = "identity") +
  ggtitle("PPC: Mean Event Rate (Control, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_mean_rate_control_sub1, 3))) +
  xlab("Mean Event Rate (Control)")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(mean_rate_treatment_obs_sub0, mean_rate_treatment_rep_sub0, stat = "identity") +
  ggtitle("PPC: Mean Event Rate (Treatment, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_mean_rate_treatment_sub0, 3))) +
  xlab("Mean Event Rate (Treatment)")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(mean_rate_treatment_obs_sub1, mean_rate_treatment_rep_sub1, stat = "identity") +
  ggtitle("PPC: Mean Event Rate (Treatment, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_mean_rate_treatment_sub1, 3))) +
  xlab("Mean Event Rate (Treatment)")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3. Total event counts

total_events_control_obs_sub0 <- sum(y_obs[subgroup == 0, 1])
total_events_control_obs_sub1 <- sum(y_obs[subgroup == 1, 1])
total_events_treatment_obs_sub0 <- sum(y_obs[subgroup == 0, 2])
total_events_treatment_obs_sub1 <- sum(y_obs[subgroup == 1, 2])

total_events_control_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  sum(y_mat[subgroup == 0, 1])
})
total_events_control_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  sum(y_mat[subgroup == 1, 1])
})
total_events_treatment_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  sum(y_mat[subgroup == 0, 2])
})
total_events_treatment_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  sum(y_mat[subgroup == 1, 2])
})

total_events_control_rep_sub0 <- as.matrix(total_events_control_rep_sub0)
total_events_control_rep_sub1 <- as.matrix(total_events_control_rep_sub1)
total_events_treatment_rep_sub0 <- as.matrix(total_events_treatment_rep_sub0)
total_events_treatment_rep_sub1 <- as.matrix(total_events_treatment_rep_sub1)

p_value_total_control_sub0 <- mean(total_events_control_rep_sub0 >= total_events_control_obs_sub0)
p_value_total_control_sub1 <- mean(total_events_control_rep_sub1 >= total_events_control_obs_sub1)
p_value_total_treatment_sub0 <- mean(total_events_treatment_rep_sub0 >= total_events_treatment_obs_sub0)
p_value_total_treatment_sub1 <- mean(total_events_treatment_rep_sub1 >= total_events_treatment_obs_sub1)
ppc_stat(total_events_control_obs_sub0, total_events_control_rep_sub0, stat = "identity") +
  ggtitle("PPC: Total Events (Control, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_total_control_sub0, 3))) +
  xlab("Total Events in Control Group")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(total_events_control_obs_sub1, total_events_control_rep_sub1, stat = "identity") +
  ggtitle("PPC: Total Events (Control, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_total_control_sub1, 3))) +
  xlab("Total Events in Control Group")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(total_events_treatment_obs_sub0, total_events_treatment_rep_sub0, stat = "identity") +
  ggtitle("PPC: Total Events (Treatment, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_total_treatment_sub0, 3))) +
  xlab("Total Events in Treatment Group")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(total_events_treatment_obs_sub1, total_events_treatment_rep_sub1, stat = "identity") +
  ggtitle("PPC: Total Events (Treatment, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_total_treatment_sub1, 3))) +
  xlab("Total Events in Treatment Group")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

4. Proportion of zero events per arm

prop_zeros_control_obs_sub0 <- mean(y_obs[subgroup == 0, 1] == 0)
prop_zeros_control_obs_sub1 <- mean(y_obs[subgroup == 1, 1] == 0)
prop_zeros_treatment_obs_sub0 <- mean(y_obs[subgroup == 0, 2] == 0)
prop_zeros_treatment_obs_sub1 <- mean(y_obs[subgroup == 1, 2] == 0)

prop_zeros_control_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  mean(y_mat[subgroup == 0, 1] == 0)
})
prop_zeros_control_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  mean(y_mat[subgroup == 1, 1] == 0)
})
prop_zeros_treatment_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  mean(y_mat[subgroup == 0, 2] == 0)
})
prop_zeros_treatment_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  mean(y_mat[subgroup == 1, 2] == 0)
})

prop_zeros_control_rep_sub0 <- as.matrix(prop_zeros_control_rep_sub0)
prop_zeros_control_rep_sub1 <- as.matrix(prop_zeros_control_rep_sub1)
prop_zeros_treatment_rep_sub0 <- as.matrix(prop_zeros_treatment_rep_sub0)
prop_zeros_treatment_rep_sub1 <- as.matrix(prop_zeros_treatment_rep_sub1)

p_value_prop_zeros_control_sub0 <- mean(prop_zeros_control_rep_sub0 >= prop_zeros_control_obs_sub0)
p_value_prop_zeros_control_sub1 <- mean(prop_zeros_control_rep_sub1 >= prop_zeros_control_obs_sub1)
p_value_prop_zeros_treatment_sub0 <- mean(prop_zeros_treatment_rep_sub0 >= prop_zeros_treatment_obs_sub0)
p_value_prop_zeros_treatment_sub1 <- mean(prop_zeros_treatment_rep_sub1 >= prop_zeros_treatment_obs_sub1)
ppc_stat(prop_zeros_control_obs_sub0, prop_zeros_control_rep_sub0, stat = "identity",
         binwidth = 0.03) +
  ggtitle("PPC: Proportion of Zero Events (Control, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_prop_zeros_control_sub0, 3))) +
  xlab("Proportion of Zero Events in Control Group")

ppc_stat(prop_zeros_control_obs_sub1, prop_zeros_control_rep_sub1, stat = "identity",
         binwidth = 0.02) +
  ggtitle("PPC: Proportion of Zero Events (Control, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_prop_zeros_control_sub1, 3))) +
  xlab("Proportion of Zero Events in Control Group")

ppc_stat(prop_zeros_treatment_obs_sub0, prop_zeros_treatment_rep_sub0, stat = "identity",
         binwidth = 0.03) +
  ggtitle("PPC: Proportion of Zero Events (Treatment, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_prop_zeros_treatment_sub0, 3))) +
  xlab("Proportion of Zero Events in Treatment Group")

ppc_stat(prop_zeros_treatment_obs_sub1, prop_zeros_treatment_rep_sub1, stat = "identity",
         binwidth = 0.02) +
  ggtitle("PPC: Proportion of Zero Events (Treatment, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_prop_zeros_treatment_sub1, 3))) +
  xlab("Proportion of Zero Events in Treatment Group")

5. Variance of event rates

var_rate_control_obs_sub0 <- var(y_obs[subgroup == 0, 1] / dat$n1[subgroup == 0])
var_rate_control_obs_sub1 <- var(y_obs[subgroup == 1, 1] / dat$n1[subgroup == 1])
var_rate_treatment_obs_sub0 <- var(y_obs[subgroup == 0, 2] / dat$n2[subgroup == 0])
var_rate_treatment_obs_sub1 <- var(y_obs[subgroup == 1, 2] / dat$n2[subgroup == 1])

var_rate_control_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  var(y_mat[subgroup == 0, 1] / dat$n1[subgroup == 0])
})
var_rate_control_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  var(y_mat[subgroup == 1, 1] / dat$n1[subgroup == 1])
})
var_rate_treatment_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  var(y_mat[subgroup == 0, 2] / dat$n2[subgroup == 0])
})
var_rate_treatment_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  var(y_mat[subgroup == 1, 2] / dat$n2[subgroup == 1])
})

var_rate_control_rep_sub0 <- as.matrix(var_rate_control_rep_sub0)
var_rate_control_rep_sub1 <- as.matrix(var_rate_control_rep_sub1)
var_rate_treatment_rep_sub0 <- as.matrix(var_rate_treatment_rep_sub0)
var_rate_treatment_rep_sub1 <- as.matrix(var_rate_treatment_rep_sub1)

p_value_var_rate_control_sub0 <- mean(var_rate_control_rep_sub0 >= var_rate_control_obs_sub0)
p_value_var_rate_control_sub1 <- mean(var_rate_control_rep_sub1 >= var_rate_control_obs_sub1)
p_value_var_rate_treatment_sub0 <- mean(var_rate_treatment_rep_sub0 >= var_rate_treatment_obs_sub0)
p_value_var_rate_treatment_sub1 <- mean(var_rate_treatment_rep_sub1 >= var_rate_treatment_obs_sub1)
ppc_stat(var_rate_control_obs_sub0, var_rate_control_rep_sub0, stat = "identity") +
  ggtitle("PPC: Variance of Event Rates (Control, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_var_rate_control_sub0, 3))) +
  xlab("Variance of Event Rates in Control Group")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(var_rate_control_obs_sub1, var_rate_control_rep_sub1, stat = "identity") +
  ggtitle("PPC: Variance of Event Rates (Control, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_var_rate_control_sub1, 3))) +
  xlab("Variance of Event Rates in Control Group")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(var_rate_treatment_obs_sub0, var_rate_treatment_rep_sub0, stat = "identity") +
  ggtitle("PPC: Variance of Event Rates (Treatment, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_var_rate_treatment_sub0, 3))) +
  xlab("Variance of Event Rates in Treatment Group")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat(var_rate_treatment_obs_sub1, var_rate_treatment_rep_sub1, stat = "identity") +
  ggtitle("PPC: Variance of Event Rates (Treatment, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_var_rate_treatment_sub1, 3))) +
  xlab("Variance of Event Rates in Treatment Group")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

6. Correlation between Control and Treatment Event Proportions

# Subgroup 0: Low_Dose == 0
sub0_idx <- which(subgroup == 0)

# Observed correlation for subgroup 0
p_control_obs_sub0 <- y_obs[sub0_idx, 1] / dat$n1[sub0_idx]
p_treatment_obs_sub0 <- y_obs[sub0_idx, 2] / dat$n2[sub0_idx]
corr_obs_sub0 <- cor(p_control_obs_sub0, p_treatment_obs_sub0)

# Replicated correlations for subgroup 0
corr_rep_sub0 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  p_control_rep <- y_mat[sub0_idx, 1] / dat$n1[sub0_idx]
  p_treatment_rep <- y_mat[sub0_idx, 2] / dat$n2[sub0_idx]
  cor(p_control_rep, p_treatment_rep)
})
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
# Convert to matrix for ppc_stat
corr_rep_sub0 <- as.matrix(corr_rep_sub0) |> na.omit()

# Compute posterior p-value for subgroup 0
p_value_corr_sub0 <- mean(corr_rep_sub0 >= corr_obs_sub0)

# Generate PPC plot for subgroup 0
ppc_stat(corr_obs_sub0, corr_rep_sub0, stat = "identity") +
  ggtitle("PPC: Correlation (Control vs. Treatment, Subgroup 0)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_corr_sub0, 3))) +
  xlab("Correlation")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Subgroup 1: Low_Dose == 1
sub1_idx <- which(subgroup == 1)

# Observed correlation for subgroup 1
p_control_obs_sub1 <- y_obs[sub1_idx, 1] / dat$n1[sub1_idx]
p_treatment_obs_sub1 <- y_obs[sub1_idx, 2] / dat$n2[sub1_idx]
corr_obs_sub1 <- cor(p_control_obs_sub1, p_treatment_obs_sub1)

# Replicated correlations for subgroup 1
corr_rep_sub1 <- apply(y_rep, 1, function(y) {
  y_mat <- matrix(y, nrow = J, ncol = 2)
  p_control_rep <- y_mat[sub1_idx, 1] / dat$n1[sub1_idx]
  p_treatment_rep <- y_mat[sub1_idx, 2] / dat$n2[sub1_idx]
  cor(p_control_rep, p_treatment_rep)
})
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero

Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
# Convert to matrix for ppc_stat
corr_rep_sub1 <- as.matrix(corr_rep_sub1) |> na.omit()

# Compute posterior p-value for subgroup 1
p_value_corr_sub1 <- mean(corr_rep_sub1 >= corr_obs_sub1)

# Generate PPC plot for subgroup 1
ppc_stat(corr_obs_sub1, corr_rep_sub1, stat = "identity") +
  ggtitle("PPC: Correlation (Control vs. Treatment, Subgroup 1)") +
  labs(subtitle = paste("Posterior p-value =", round(p_value_corr_sub1, 3))) +
  xlab("Correlation")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

7. Predicted Study-Specific Control/Treatment Probabiltities

# Compute study-specific probabilities
p_control_n <- matrix(NA, n_samples, J)
p_treatment_n <- matrix(NA, n_samples, J)

for (j in 1:J) {
  nu_control_j <- draws1[[paste0("nu[", j, ",1]")]]
  nu_treatment_j <- draws1[[paste0("nu[", j, ",2]")]]
  
  # Adjust fixed effects for subgroup
  p_control_n[, j] <- plogis(mu_control + beta_control * subgroup[j] + nu_control_j)
  p_treatment_n[, j] <- plogis(mu_treatment + beta_treatment * subgroup[j] + nu_treatment_j)
}

control_long <- data.frame(
  Probability = as.vector(p_control_n),
  Study = factor(rep(1:J, each = n_samples)),
  Group = "Control",
  Subgroup = factor(rep(subgroup, each = n_samples), levels = c(0, 1), labels = c("Subgroup 0", "Subgroup 1"))
)

treatment_long <- data.frame(
  Probability = as.vector(p_treatment_n),
  Study = factor(rep(1:J, each = n_samples)),
  Group = "Treatment",
  Subgroup = factor(rep(subgroup, each = n_samples), levels = c(0, 1), labels = c("Subgroup 0", "Subgroup 1"))
)

# Combine data for unified visualization
data_long <- rbind(control_long, treatment_long)
ggplot(control_long, aes(x = Probability, group = Study)) +
  geom_density(alpha = 0.01, linewidth = 0.03) +
  facet_grid(Group ~ Subgroup, scales = "free_y") +
  labs(title = "Density of Predicted Study-Specific Control Probabilities",
       x = "Study-Specific Probability",
       y = "Density") +
  theme_minimal() 

ggplot(treatment_long, aes(x = Probability, group = Study)) +
  geom_density(alpha = 0.01, linewidth = 0.03) +
  facet_grid(Group ~ Subgroup, scales = "free_y") +
  labs(title = "Density of Predicted Study-Specific Treatment Probabilities",
       x = "Probability",
       y = "Density") +
  theme_minimal() 

This plots shows the predicted probability density for each study as heatmaps. Each row represents a study in our sample. Red points depict the observed study-specific probability.

# Heatmap with empirical probabilities and subgroup faceting
ggplot(control_long, aes(x = Probability, y = Study)) +
  geom_bin2d(binwidth = c(0.02, 1)) +
  scale_fill_gradient(low = "white", high = "blue", name = "Density") +
  geom_point(data = empirical_probabilities, aes(x = p1, y = Study), color = "red") +
  facet_grid(Group ~ Subgroup, scales = "free_y") +
  labs(title = "Heatmap of Predicted Study-Specific Control Probabilities",
       x = "Probability",
       y = "Study",
       fill = "Density") +
  theme_minimal() +
  theme(strip.text = element_text(size = 12, face = "bold"))

ggplot(treatment_long, aes(x = Probability, y = Study)) +
  geom_bin2d(binwidth = c(0.02, 1)) +
  scale_fill_gradient(low = "white", high = "blue", name = "Density") +
  geom_point(data = empirical_probabilities, aes(x = p2, y = Study), color = "red") +
  facet_grid(Group ~ Subgroup, scales = "free_y") +
  labs(title = "Heatmap of Predicted Study-Specific Treatment Probabilities",
       x = "Probability",
       y = "Study",
       fill = "Density") +
  theme_minimal() +
  theme(strip.text = element_text(size = 12, face = "bold"))